Open In Colab

In [1]:
import matplotlib.pyplot as plt
from sklearn import datasets
import pandas as pd
import pandas as pd
import numpy as np
import seaborn as sns
sns.set(color_codes=True)
import warnings
from sklearn.impute import SimpleImputer
warnings.filterwarnings("ignore")
In [2]:
# for csv file
#link='https://drive.google.com/file/d/14aEtw3PE3kZ5fY1i5GrcKQKXVldYRGcS/view?usp=sharing'
#id = link.split("/")[-2] 
#downloaded = drive.CreateFile({'id':id})  
##downloaded.GetContentFile('owid-covid-data.csv')
df = pd.read_csv('owid-covid-data.csv') 
print(df.shape)
(83211, 59)
In [3]:
#  for csv file
#link='https://drive.google.com/file/d/13QYgFjQ3ztf_TxgK21CobYI3hCchAqFm/view?usp=sharing'
#id = link.split("/")[-2] 
#downloaded = drive.CreateFile({'id':id})  
#downloaded.GetContentFile('govt_index.csv')
df_gov = pd.read_csv('govt_index.csv') 
print(df_gov.shape)
(137690, 51)
In [4]:
#  for csv file
#link='https://drive.google.com/file/d/1m1DH6MDIf_A-2FaDASROgiPfyZZmAYMQ/view?usp=sharing'
#id = link.split("/")[-2] 
#downloaded = drive.CreateFile({'id':id})  
#downloaded.GetContentFile('govt_index.csv')
df_vaccine = pd.read_csv('govt_index.csv') 
print(df_vaccine.shape)
(137690, 51)
In [5]:
df.head()
Out[5]:
iso_code continent location date total_cases new_cases new_cases_smoothed total_deaths new_deaths new_deaths_smoothed ... gdp_per_capita extreme_poverty cardiovasc_death_rate diabetes_prevalence female_smokers male_smokers handwashing_facilities hospital_beds_per_thousand life_expectancy human_development_index
0 AFG Asia Afghanistan 2020-02-24 1.0 1.0 NaN NaN NaN NaN ... 1803.987 NaN 597.029 9.59 NaN NaN 37.746 0.5 64.83 0.511
1 AFG Asia Afghanistan 2020-02-25 1.0 0.0 NaN NaN NaN NaN ... 1803.987 NaN 597.029 9.59 NaN NaN 37.746 0.5 64.83 0.511
2 AFG Asia Afghanistan 2020-02-26 1.0 0.0 NaN NaN NaN NaN ... 1803.987 NaN 597.029 9.59 NaN NaN 37.746 0.5 64.83 0.511
3 AFG Asia Afghanistan 2020-02-27 1.0 0.0 NaN NaN NaN NaN ... 1803.987 NaN 597.029 9.59 NaN NaN 37.746 0.5 64.83 0.511
4 AFG Asia Afghanistan 2020-02-28 1.0 0.0 NaN NaN NaN NaN ... 1803.987 NaN 597.029 9.59 NaN NaN 37.746 0.5 64.83 0.511

5 rows × 59 columns

In [6]:
print("total size of data is = ",df.shape)
print()
missing_values_count = df.isnull().sum()
print("Feature with highest number of missing data is ,")
print(missing_values_count.nlargest(n=50))
print()
print("% data which is missing = ",missing_values_count.sum()/np.product(df.shape)*100)
total size of data is =  (83211, 59)

Feature with highest number of missing data is ,
weekly_icu_admissions                    82437
weekly_icu_admissions_per_million        82437
weekly_hosp_admissions                   81938
weekly_hosp_admissions_per_million       81938
people_fully_vaccinated                  77676
people_fully_vaccinated_per_hundred      77676
new_vaccinations                         76192
people_vaccinated                        75583
people_vaccinated_per_hundred            75583
total_vaccinations                       74919
total_vaccinations_per_hundred           74919
icu_patients                             74456
icu_patients_per_million                 74456
hosp_patients                            72642
hosp_patients_per_million                72642
new_vaccinations_smoothed                69837
new_vaccinations_smoothed_per_million    69837
total_tests                              45575
total_tests_per_thousand                 45575
new_tests                                45299
new_tests_per_thousand                   45299
handwashing_facilities                   45059
tests_per_case                           42048
positive_rate                            41456
new_tests_smoothed                       39780
new_tests_smoothed_per_thousand          39780
tests_units                              38360
extreme_poverty                          31887
male_smokers                             24565
female_smokers                           23686
reproduction_rate                        15907
hospital_beds_per_thousand               13883
stringency_index                         12418
total_deaths_per_million                 11949
new_deaths_per_million                   11791
total_deaths                             11522
new_deaths                               11364
aged_65_older                             9069
aged_70_older                             8632
median_age                                8211
gdp_per_capita                            7893
human_development_index                   7378
cardiovasc_death_rate                     7267
diabetes_prevalence                       6205
population_density                        5757
life_expectancy                           4242
continent                                 4030
new_cases_smoothed_per_million            3368
new_deaths_smoothed_per_million           3368
new_cases_smoothed                        2933
dtype: int64

% data which is missing =  40.3873632254862
In [7]:
temp_df = df[~np.isnan(df['weekly_icu_admissions' ])]
print(temp_df.shape)
print(temp_df.groupby(['location']).mean())
(774, 59)
              total_cases     new_cases  new_cases_smoothed  total_deaths  \
location                                                                    
Cyprus       1.740907e+04    181.682927          193.341439    104.951220   
Czechia      4.396877e+05   2465.228070         4014.105228   7167.157895   
Estonia      2.653748e+04    313.227273          357.500000    254.522727   
France       1.572342e+06  14752.034483        13175.852207  45885.120690   
Greece       7.410264e+04    527.525424          763.355881   2362.637931   
Iceland      6.286000e+03      0.000000            4.000000     29.000000   
Ireland      8.791754e+04    666.888889          633.341278   2336.074074   
Israel       2.933627e+05   1541.745763         2026.731186   2336.070175   
Latvia       3.812872e+04    283.230769          404.824154    696.500000   
Lithuania    1.382320e+05   1169.592593         1203.798963   2071.333333   
Malta        1.398769e+04    122.655172          132.581276    195.793103   
Netherlands  4.165076e+05   3531.150000         3458.668237   8470.593220   
Norway       3.047920e+04    179.533333          260.268847    341.879310   
Slovenia     6.458516e+04    363.896552          571.564000   1300.758621   
Spain        1.135766e+06    522.421875         7726.265286  38779.728814   
Sweden       2.332423e+05     90.523810         2041.127016   6868.206897   

             new_deaths  new_deaths_smoothed  total_cases_per_million  \
location                                                                
Cyprus         0.658537             0.989585             19875.662805   
Czechia       54.368421            71.243088             41057.839754   
Estonia        2.863636             3.201295             20005.048659   
France       138.396552           248.455672             23072.559086   
Greece        23.413793            22.910424              7109.493000   
Iceland        0.000000             0.000000             18420.513000   
Ireland        5.222222            12.261907             17805.015981   
Israel        17.333333            15.338949             33893.057085   
Latvia         7.000000             7.472564             20214.546462   
Lithuania     18.296296            19.423333             50777.819111   
Malta          1.620690             1.852103             31679.397897   
Netherlands   25.677966            41.469797             24307.596783   
Norway         0.362069             1.714390              5622.180333   
Slovenia       9.275862            10.243914             31066.506810   
Spain         62.728814           174.560079             24291.965266   
Sweden         2.724138            31.265254             23094.961651   

             new_cases_per_million  new_cases_smoothed_per_million  \
location                                                             
Cyprus                  207.424561                      220.734878   
Czechia                 230.201930                      374.835421   
Estonia                 236.123727                      269.498250   
France                  216.471517                      193.342586   
Greece                   50.611356                       73.237288   
Iceland                   0.000000                       11.722000   
Ireland                 135.057981                      128.263944   
Israel                  178.122322                      234.154254   
Latvia                  150.159179                      214.624026   
Lithuania               429.635370                      442.200741   
Malta                   277.790138                      300.270828   
Netherlands             206.079700                      201.849644   
Norway                   33.116667                       48.009119   
Slovenia                175.040155                      274.931534   
Spain                    11.173641                      165.250841   
Sweden                    8.963413                      202.106333   

             total_deaths_per_million  ...  gdp_per_capita  extreme_poverty  \
location                               ...                                    
Cyprus                     119.821122  ...       32415.132              NaN   
Czechia                    669.266070  ...       32605.906              NaN   
Estonia                    191.869659  ...       29481.252              0.5   
France                     673.318810  ...       38605.671              NaN   
Greece                     226.674224  ...       24574.382              1.5   
Iceland                     84.982000  ...       46482.958              0.2   
Ireland                    473.100556  ...       67335.293              0.2   
Israel                     269.893018  ...       33132.320              0.5   
Latvia                     369.260500  ...       25063.846              0.7   
Lithuania                  760.878815  ...       29524.265              0.7   
Malta                      443.433310  ...       36513.323              0.2   
Netherlands                494.348186  ...       48472.545              NaN   
Norway                      63.062845  ...       64800.057              0.2   
Slovenia                   625.685983  ...       31400.840              NaN   
Spain                      829.428085  ...       34272.360              1.0   
Sweden                     680.069621  ...       46949.283              0.5   

             cardiovasc_death_rate  diabetes_prevalence  female_smokers  \
location                                                                  
Cyprus                     141.171                 9.24            19.6   
Czechia                    227.485                 6.82            30.5   
Estonia                    255.569                 4.02            24.5   
France                      86.060                 4.77            30.1   
Greece                     175.695                 4.55            35.3   
Iceland                    117.992                 5.31            14.3   
Ireland                    126.459                 3.28            23.0   
Israel                      93.320                 6.74            15.4   
Latvia                     350.060                 4.91            25.6   
Lithuania                  342.989                 3.67            21.3   
Malta                      168.711                 8.83            20.9   
Netherlands                109.361                 5.29            24.4   
Norway                     114.316                 5.31            19.6   
Slovenia                   153.493                 7.25            20.1   
Spain                       99.403                 7.17            27.4   
Sweden                     133.982                 4.79            18.8   

             male_smokers  handwashing_facilities  hospital_beds_per_thousand  \
location                                                                        
Cyprus               52.7                     NaN                       3.400   
Czechia              38.3                     NaN                       6.630   
Estonia              39.3                     NaN                       4.690   
France               35.6                     NaN                       5.980   
Greece               52.0                     NaN                       4.210   
Iceland              15.2                     NaN                       2.910   
Ireland              25.7                     NaN                       2.960   
Israel               35.4                     NaN                       2.990   
Latvia               51.0                     NaN                       5.570   
Lithuania            38.0                     NaN                       6.560   
Malta                30.2                     NaN                       4.485   
Netherlands          27.3                     NaN                       3.320   
Norway               20.7                     NaN                       3.600   
Slovenia             25.0                     NaN                       4.500   
Spain                31.4                     NaN                       2.970   
Sweden               18.9                     NaN                       2.220   

             life_expectancy  human_development_index  
location                                               
Cyprus                 80.98                    0.887  
Czechia                79.38                    0.900  
Estonia                78.74                    0.892  
France                 82.66                    0.901  
Greece                 82.24                    0.888  
Iceland                82.99                    0.949  
Ireland                82.30                    0.955  
Israel                 82.97                    0.919  
Latvia                 75.29                    0.866  
Lithuania              75.93                    0.882  
Malta                  82.53                    0.895  
Netherlands            82.28                    0.944  
Norway                 82.40                    0.957  
Slovenia               81.32                    0.917  
Spain                  83.56                    0.904  
Sweden                 82.80                    0.945  

[16 rows x 54 columns]
In [66]:
# INDIA
df_india = df[df['location']=='India']

df_india_total_cases = df_india['total_cases']
df_india_total_cases = df_india_total_cases.fillna(0)
# plt.plot(df_india_total_cases)

df_india_total_cases_per_million = df_india['total_cases_per_million']
df_india_total_cases_per_million = df_india_total_cases_per_million.fillna(0)
# plt.plot(df_india_total_cases_per_million)


df_india_new_cases = df_india['new_cases_smoothed']
df_india_new_cases = df_india_new_cases.fillna(0)
# plt.plot(df_india_new_cases)

df_india_new_cases_per_million = df_india['new_cases_smoothed_per_million']
df_india_new_cases_per_million = df_india_new_cases_per_million.fillna(0)
# plt.plot(df_india_new_cases_per_million)


df_india_total_death = df_india['total_deaths']
df_india_total_death = df_india_total_death.fillna(0)
# plt.plot(df_india_total_death)

df_india_total_deaths_per_million = df_india['total_deaths_per_million']
df_india_total_deaths_per_million = df_india_total_deaths_per_million.fillna(0)
# plt.plot(df_india_total_deaths_per_million)


# handle outlier here
df_india_new_death = df_india['new_deaths_smoothed']
df_india_new_death = df_india_new_death.fillna(0)
# plt.plot(df_india_new_death)

df_india_new_deaths_per_million = df_india['new_deaths_smoothed_per_million']
df_india_new_deaths_per_million = df_india_new_deaths_per_million.fillna(0)
# plt.plot(df_india_new_deaths_per_million)



df_india_reproduction = df_india['reproduction_rate']
# missing_values_count = df_india_reproduction.isnull().sum()
# print(missing_values_count)
x=df_india_reproduction.dropna()
val = x.iloc[0]
val1 =x.iloc[-1]

df_india_reproduction = df_india_reproduction.fillna(val)
df_india_reproduction.iloc[-1] = val1
# plt.plot(df_india_reproduction)

df_india_stringency = df_india['stringency_index']
df_india_stringency = df_india_stringency.fillna(method = 'bfill', axis=0).fillna(df_india_stringency.dropna().mean())
plt.plot(df_india_stringency)
Out[66]:
[<matplotlib.lines.Line2D at 0x1edb2091c40>]
In [68]:
# ISRAEL
df_israel = df[df['location']=='Israel']

df_israel_total_cases = df_israel['total_cases']
df_israel_total_cases = df_israel_total_cases.fillna(0)
# plt.plot(df_israel_total_cases)

df_israel_total_cases_per_million = df_israel['total_cases_per_million']
df_israel_total_cases_per_million = df_israel_total_cases_per_million.fillna(0)
df_israel_total_cases_per_million.iloc[-1]=df_israel_total_cases_per_million.iloc[-2]
# plt.plot(df_israel_total_cases_per_million)


df_israel_new_cases = df_israel['new_cases_smoothed']
df_israel_new_cases = df_israel_new_cases.fillna(0)
# plt.plot(df_israel_new_cases)

df_israel_new_cases_per_million = df_israel['new_cases_smoothed_per_million']
df_israela_new_cases_per_million = df_israel_new_cases_per_million.fillna(0)
# plt.plot(df_israel_new_cases_per_million)


df_israel_total_death = df_israel['total_deaths']
df_israel_total_death = df_israel_total_death.fillna(0)
# plt.plot(df_israel_total_death)

df_israel_total_deaths_per_million = df_israel['total_deaths_per_million']
df_israel_total_deaths_per_million = df_israel_total_deaths_per_million.fillna(0)
df_israel_total_deaths_per_million.iloc[-1] = df_israel_total_deaths_per_million.iloc[-2]
# plt.plot(df_israel_total_deaths_per_million)


# handle outlier here
df_israel_new_death = df_israel['new_deaths_smoothed']
df_israel_new_death = df_israel_new_death.fillna(0)
# plt.plot(df_israel_new_death)

df_israel_new_deaths_per_million = df_israel['new_deaths_smoothed_per_million']
df_israel_new_deaths_per_million = df_israel_new_deaths_per_million.fillna(0)
# plt.plot(df_israel_new_deaths_per_million)


df_israel_reproduction = df_israel['reproduction_rate']
# missing_values_count = df_israel_reproduction.isnull().sum()
# print(missing_values_count)
x=df_israel_reproduction.dropna()
val1 =x.iloc[-1]
# df_israel_reproduction = df_israel_reproduction.fillna(val)
# df_israel_reproduction.iloc[-1] = val1
# df_israel_reproduction.iloc[0] = val

df_israel_reproduction = df_israel_reproduction.fillna(method = 'bfill', axis=0).fillna(val1)
# plt.plot(df_israel_reproduction)

df_israel_stringency = df_israel['stringency_index']
df_israel_stringency = df_israel_stringency.fillna(method = 'bfill', axis=0).fillna(df_israel_stringency.dropna().mean())
plt.plot(df_israel_stringency)
Out[68]:
[<matplotlib.lines.Line2D at 0x1edad818b20>]
In [67]:
# USA
df_usa = df[df['location']=='United States']

df_usa_total_cases = df_usa['total_cases']
df_usa_total_cases = df_usa_total_cases.fillna(0)
# plt.plot(df_usa_total_cases)

df_usa_total_cases_per_million = df_usa['total_cases_per_million']
df_usa_total_cases_per_million = df_usa_total_cases_per_million.fillna(0)
# plt.plot(df_usa_total_cases_per_million)

df_usa_new_cases = df_usa['new_cases_smoothed']
df_usa_new_cases = df_usa_new_cases.fillna(0)
# plt.plot(df_usa_new_cases)

df_usa_new_cases_per_million = df_usa['new_cases_smoothed_per_million']
df_usa_new_cases_per_million = df_usa_new_cases_per_million.fillna(0)
# plt.plot(df_usa_new_cases_per_million)

df_usa_total_death = df_usa['total_deaths']
df_usa_total_death = df_usa_total_death.fillna(0)
# plt.plot(df_usa_total_death)

df_usa_total_deaths_per_million = df_usa['total_deaths_per_million']
df_usa_total_deaths_per_million = df_usa_total_deaths_per_million.fillna(0)
# plt.plot(df_usa_total_deaths_per_million)


df_usa_new_death = df_usa['new_deaths_smoothed']
df_usa_new_death = df_usa_new_death.fillna(0)
# plt.plot(df_usa_new_death)

df_usa_new_deaths_per_million = df_usa['new_deaths_smoothed_per_million']
df_usa_new_deaths_per_million = df_usa_new_deaths_per_million.fillna(0)
# plt.plot(df_usa_new_deaths_per_million)


df_usa_reproduction = df_usa['reproduction_rate']
x=df_usa_reproduction.dropna()
val = x.iloc[0]
val1 =x.iloc[-1]
df_usa_reproduction = df_usa_reproduction.fillna(val)
df_usa_reproduction.iloc[-1] = val1
# plt.plot(df_usa_reproduction)


df_usa_stringency = df_usa['stringency_index']
df_usa_stringency = df_usa_stringency.fillna(method = 'bfill', axis=0).fillna(df_usa_stringency.dropna().mean())
plt.plot(df_usa_stringency)
Out[67]:
[<matplotlib.lines.Line2D at 0x1edb20455b0>]
In [69]:
# Italy
df_italy = df[df['location']=='Italy']

df_italy_total_cases = df_italy['total_cases']
df_italy_total_cases = df_italy_total_cases.fillna(0)
# plt.plot(df_italy_total_cases)

df_italy_total_cases_per_million = df_italy['total_cases_per_million']
df_italy_total_cases_per_million = df_italy_total_cases_per_million.fillna(0)
# plt.plot(df_italy_total_cases_per_million)


df_italy_new_cases = df_italy['new_cases_smoothed']
df_italy_new_cases = df_italy_new_cases.fillna(0)
# plt.plot(df_italy_new_cases)
# plt.show()

df_italy_new_cases_per_million = df_italy['new_cases_smoothed_per_million']
df_italy_new_cases_per_million = df_italy_new_cases_per_million.fillna(0)
# plt.plot(df_italy_new_cases_per_million)


df_italy_total_death = df_italy['total_deaths']
df_italy_total_death = df_italy_total_death.fillna(0)
# plt.plot(df_italy_total_death)

df_italy_total_deaths_per_million = df_italy['total_deaths_per_million']
df_italy_total_deaths_per_million = df_italy_total_deaths_per_million.fillna(0)
# plt.plot(df_italy_total_deaths_per_million)


df_italy_new_death = df_italy['new_deaths_smoothed'].clip(lower=0)
df_italy_new_death = df_italy_new_death.fillna(0)
# df_italy_new_death = df_italy_new_death.clip(lower=0)
# plt.plot(df_italy_new_death)

df_italy_new_deaths_per_million = df_italy['new_deaths_smoothed_per_million'].clip(lower=0)
df_italy_new_deaths_per_million = df_italy_new_deaths_per_million.fillna(0)
# plt.plot(df_italy_new_deaths_per_million)


df_italy_reproduction = df_italy['reproduction_rate']
x=df_italy_reproduction.dropna()
val = x.iloc[0]
val1 =x.iloc[-1]
df_italy_reproduction = df_italy_reproduction.fillna(val)
df_italy_reproduction.iloc[-1] = val1
# plt.plot(df_italy_reproduction)



df_italy_stringency = df_italy['stringency_index']
df_italy_stringency = df_italy_stringency.fillna(method = 'bfill', axis=0).fillna(df_italy_stringency.dropna().mean())
plt.plot(df_italy_stringency)
Out[69]:
[<matplotlib.lines.Line2D at 0x1edac869430>]
In [70]:
# New Zealand
df_nz = df[df['location']=='New Zealand']

df_nz_total_cases = df_nz['total_cases']
df_nz_total_cases = df_nz_total_cases.fillna(0)
# plt.plot(df_nz_total_cases)

df_nz_total_cases_per_million = df_nz['total_cases_per_million']
df_nz_total_cases_per_million = df_nz_total_cases_per_million.fillna(0)
# plt.plot(df_nz_total_cases_per_million)


df_nz_new_cases = df_nz['new_cases_smoothed']
df_nz_new_cases = df_nz_new_cases.fillna(0)
# plt.plot(df_nz_new_cases)
# plt.show()

df_nz_new_cases_per_million = df_nz['new_cases_smoothed_per_million']
df_nz_new_cases_per_million = df_nz_new_cases_per_million.fillna(0)
# plt.plot(df_nz_new_cases_per_million)

df_nz_total_death = df_nz['total_deaths']
df_nz_total_death = df_nz_total_death.fillna(0)
# plt.plot(df_nz_total_death)

df_nz_total_deaths_per_million = df_nz['total_deaths_per_million']
df_nz_total_deaths_per_million = df_nz_total_deaths_per_million.fillna(0)
# plt.plot(df_nz_total_deaths_per_million)

df_nz_new_death = df_nz['new_deaths_smoothed']
df_nz_new_death = df_nz_new_death.fillna(0)
df_nz_new_death = df_nz_new_death.clip(lower=0)
# plt.plot(df_nz_new_death)

df_nz_new_deaths_per_million = df_nz['new_deaths_smoothed_per_million']
df_nz_new_deaths_per_million = df_nz_new_deaths_per_million.fillna(0)
# plt.plot(df_nz_new_deaths_per_million)


df_nz_reproduction = df_nz['reproduction_rate']
x=df_nz_reproduction.dropna()
val = x.iloc[0]
val1 =x.iloc[-1]
df_nz_reproduction = df_nz_reproduction.fillna(val)
df_nz_reproduction.iloc[-1] = val1
# plt.plot(df_nz_reproduction)


df_nz_stringency = df_nz['stringency_index']
df_nz_stringency = df_nz_stringency.fillna(method = 'bfill', axis=0).fillna(df_nz_stringency.dropna().mean())
plt.plot(df_nz_stringency)
Out[70]:
[<matplotlib.lines.Line2D at 0x1edb1e034f0>]
In [71]:
# WORLD
df_w = df[df['location']=='World']

df_w_total_cases = df_w['total_cases']
df_w_total_cases = df_w_total_cases.fillna(0)

df_w_total_deaths = df_w['total_deaths']
df_w_total_deaths = df_w_total_deaths.fillna(0)

df_w_total_tests = df_w['total_tests']
df_w_total_tests = df_w_total_tests.fillna(0)

df_w_new_cases = df_w['new_cases']
df_w_new_cases = df_w_new_cases.fillna(0)

df_w_new_cases_smoothed = df_w['new_cases_smoothed']
df_w_new_cases_smoothed = df_w_new_cases.fillna(0)
In [72]:
# Stringency Index

plt.figure()
plt.plot(df_india_stringency.to_numpy(),label='ind')
plt.plot(df_israel_stringency.to_numpy(),label='isr')
plt.plot(df_usa_stringency.to_numpy(),label='usa')
plt.plot(df_italy_stringency.to_numpy(),label='ita')
plt.plot(df_nz_stringency.to_numpy(),label='nz')
plt.legend()

plt.grid()
In [73]:
import pycountry
import plotly.express as px
df1 = df
list_countries = df1['location'].unique().tolist()
d_country_code = {}  
for country in list_countries:
    try:
        country_data = pycountry.countries.search_fuzzy(country)
        country_code = country_data[0].alpha_3
        d_country_code.update({country: country_code})
    except:
        print('could not add ISO 3 code for ->', country)
        d_country_code.update({country: ' '})
for k, v in d_country_code.items():
    df1.loc[(df1.location == k), 'iso_alpha'] = v
could not add ISO 3 code for -> Asia
could not add ISO 3 code for -> Cape Verde
could not add ISO 3 code for -> Democratic Republic of Congo
could not add ISO 3 code for -> Europe
could not add ISO 3 code for -> European Union
could not add ISO 3 code for -> Faeroe Islands
could not add ISO 3 code for -> International
could not add ISO 3 code for -> Laos
could not add ISO 3 code for -> Micronesia (country)
could not add ISO 3 code for -> North America
could not add ISO 3 code for -> Northern Cyprus
could not add ISO 3 code for -> Oceania
could not add ISO 3 code for -> South America
could not add ISO 3 code for -> South Korea
could not add ISO 3 code for -> World
In [74]:
fig = px.choropleth(df1,                            # Input Dataframe
                     locations="iso_alpha",           # identify country code column
                     color="total_cases",                     # identify representing column
                     hover_name="location",              # identify hover name
                     animation_frame="date",        # identify date column
                     projection="natural earth",        # select projection
                     color_continuous_scale = 'Peach',  # select prefer color scale
                     range_color=[0,40000000]              # select range of dataset
                     )        
fig.show()
In [78]:
x = np.array(["India","USA","New Zealand","Italy","Isreal"])
y = [df_india_total_cases_per_million.values[-2],df_usa_total_cases_per_million.values[-2],df_nz_total_cases_per_million.values[-2],df_italy_total_cases_per_million.values[-2],df_israel_total_cases_per_million.values[-2]]
plt.figure(2)
plt.figure(figsize=(18, 10))
ax = plt.subplot(111)
r2 = ax.bar(x, y, width=0.25, color='b', align='center')
#plt.legend(['Total Cases per Million Population'], loc=0, fontsize=15)
plt.grid(b = True, alpha = 0.8, linewidth = 1)
def autolabel(rects):
    for rect in rects:
        height = rect.get_height()
        ax.text(rect.get_x() + rect.get_width()/2, 1+height,'%0.1f' % float(height),ha='center', va='bottom')
plt.title('Total Cases per Million Population(log)', fontsize=30)
plt.ylabel('Count per million', fontsize=20)
plt.xlabel('Country', fontsize=20)
autolabel(r2)
plt.yscale("log")
plt.show()
<Figure size 432x288 with 0 Axes>
In [79]:
plt.figure(figsize=(6, 4))
heatmap = sns.heatmap(df[['total_cases','total_tests','total_deaths']].corr(), vmin=0, vmax=1, annot=True)
heatmap.set_title('Pearson Correlation Heatmap', fontdict={'fontsize':12}, pad=12);
In [80]:
plt.figure(figsize=(6, 4))
corr = df[['total_cases','total_tests','total_deaths']].corr(method = 'spearman')
heatmap = sns.heatmap(corr, vmin=0, vmax=1, annot=True)
heatmap.set_title('Spearman Correlation Heatmap', fontdict={'fontsize':12}, pad=12);
In [81]:
x = np.array(["India","USA","New Zealand","Italy","Israel"])
y = [np.divide(df_india_total_tests.values,df_india_total_cases.values)[-3],np.divide(df_usa_total_tests.values,df_usa_total_cases.values)[-6],np.divide(df_nz_total_tests.values,df_nz_total_cases.values)[-3],np.divide(df_italy_total_tests.values,df_italy_total_cases.values)[-3],np.divide(df_israel_total_tests.values,df_israel_total_cases.values)[-4]]
plt.figure(2)
plt.figure(figsize=(18, 10))
ax = plt.subplot(111)
r2 = ax.bar(x, y, width=0.25, color='b', align='center')
#plt.legend(['Total Cases per Million Population'], loc=0, fontsize=15)
plt.grid(b = True, alpha = 0.8, linewidth = 1)
def autolabel(rects):
    for rect in rects:
        height = rect.get_height()
        ax.text(rect.get_x() + rect.get_width()/2, 1+height,'%0.1f' % float(height),ha='center', va='bottom')
plt.title('Test-Cases ratio', fontsize=30)
plt.ylabel('Ratio(log)', fontsize=30)
plt.xlabel('Country', fontsize=30)
plt.xticks(fontsize= 20)
plt.yscale("log")
autolabel(r2)
plt.show()
<Figure size 432x288 with 0 Axes>
In [82]:
import plotly.express as px
x=np.array([df_india_total_tests.values[-3],df_usa_total_tests.values[-6],df_nz_total_tests.values[-3],df_italy_total_tests.values[-3],df_israel_total_tests.values[-4]])
y = np.array([df_india_total_cases.values[-3],df_usa_total_cases.values[-6],df_nz_total_cases.values[-3],df_italy_total_cases.values[-3],df_israel_total_cases.values[-4]])
tcr = np.array([np.divide(df_india_total_tests.values,df_india_total_cases.values)[-3],np.divide(df_usa_total_tests.values,df_usa_total_cases.values)[-6],np.divide(df_nz_total_tests.values,df_nz_total_cases.values)[-3],np.divide(df_italy_total_tests.values,df_italy_total_cases.values)[-3],np.divide(df_israel_total_tests.values,df_israel_total_cases.values)[-4]])
  
dat = pd.DataFrame(columns =['total_tests', 'total_cases','tcr',"location"])
dat['total_tests'] = pd.Series(x)
dat['total_cases'] = pd.Series(y)
dat['tcr'] = pd.Series(tcr)
dat['location'] = pd.Series(["India","USA","New Zealand","Italy","Israel"])

fig = px.scatter(dat, x="total_tests", y="total_cases",
	         size="tcr", color="tcr",
                 hover_name="location", size_max=70, text="location")
fig.update_traces(textposition='top center')
fig.show()
In [83]:
search = pd.read_csv('searchCOVID.csv') 
world = df_w_total_cases[df_w_total_cases.index % 7 == 0]
df_india_total_cases = df_india_total_cases[df_india_total_cases.index % 7 == 0]
df_usa_total_cases = df_usa_total_cases[df_usa_total_cases.index % 7 == 0]
df_nz_total_cases = df_nz_total_cases[df_nz_total_cases.index % 7 == 0]
df_israel_total_cases = df_israel_total_cases[df_israel_total_cases.index % 7 == 0]
df_italy_total_cases = df_italy_total_cases[df_italy_total_cases.index % 7 == 0]
In [84]:
plt.figure(figsize=(18, 10))
plt.plot([i for i in range(len(search['Week']))], search['covid'], color='r')
plt.plot([i for i in range(len(search['Week']))], search['vaccine'], color='g')
plt.plot([i for i in range(len(search['Week']))], world/max(world)*100, color='b')
plt.legend(['Searches for COVID','Searches for its Vaccine','Total global cases'], loc=1, fontsize=15)
plt.grid(b = True, alpha = 0.8, linewidth = 1)
plt.title('Google searches(Global)', fontsize=30)
plt.ylabel('Value', fontsize=20)
plt.xlabel('Week', fontsize=20)
autolabel(r2)
plt.show()
In [85]:
plt.figure(figsize=(18, 10))
plt.plot([i for i in range(len(search['Week']))], search['india_c'], color='r')
plt.plot([i for i in range(len(search['Week']))], search['india_v'], color='g')
plt.plot((df_india_total_cases/max(df_india_total_cases)*100).values, color='b')
plt.legend(['Searches for COVID','Searches for its Vaccine','Total cases'], loc=1, fontsize=15)
plt.grid(b = True, alpha = 0.8, linewidth = 1)
plt.title('Google searches(India)', fontsize=30)
plt.ylabel('Value', fontsize=20)
plt.xlabel('Week', fontsize=20)
plt.show()
In [86]:
plt.figure(figsize=(18, 10))
plt.plot([i for i in range(len(search['Week']))], search['usa_c'], color='r')
plt.plot([i for i in range(len(search['Week']))], search['usa_v'], color='g')
plt.plot((df_usa_total_cases/max(df_usa_total_cases)*100).values, color='b')
plt.legend(['Searches for COVID','Searches for its Vaccine','Total cases'], loc=1, fontsize=15)
plt.grid(b = True, alpha = 0.8, linewidth = 1)
plt.title('Google searches(USA)', fontsize=30)
plt.ylabel('Value', fontsize=20)
plt.xlabel('Week', fontsize=20)
plt.show()
In [87]:
plt.figure(figsize=(18, 10))
plt.plot([i for i in range(len(search['Week']))], search['nz_c'], color='r')
plt.plot([i for i in range(len(search['Week']))], search['nz_v'], color='g')
plt.plot((df_nz_total_cases/max(df_nz_total_cases)*100).values, color='b')
plt.legend(['Searches for COVID','Searches for its Vaccine','Total cases'], loc=1, fontsize=15)
plt.grid(b = True, alpha = 0.8, linewidth = 1)
plt.title('Google searches(New Zealand)', fontsize=30)
plt.ylabel('Value', fontsize=20)
plt.xlabel('Week', fontsize=20)
plt.show()
In [88]:
plt.figure(figsize=(18, 10))
plt.plot([i for i in range(len(search['Week']))], search['italy_c'], color='r')
plt.plot([i for i in range(len(search['Week']))], search['italy_v'], color='g')
plt.plot((df_italy_total_cases/max(df_italy_total_cases)*100).values, color='b')
plt.legend(['Searches for COVID','Searches for its Vaccine','Total cases'], loc=1, fontsize=15)
plt.grid(b = True, alpha = 0.8, linewidth = 1)
plt.title('Google searches(Italy)', fontsize=30)
plt.ylabel('Value', fontsize=20)
plt.xlabel('Week', fontsize=20)
plt.show()
In [89]:
plt.figure(figsize=(18, 10))
plt.plot([i for i in range(len(search['Week']))], search['israel_c'], color='r')
plt.plot([i for i in range(len(search['Week']))], search['israel_v'], color='g')
plt.plot((df_israel_total_cases/max(df_israel_total_cases)*100).values[0:-2], color='b')
plt.legend(['Searches for COVID','Searches for its Vaccine','Total cases'], loc=1, fontsize=15)
plt.grid(b = True, alpha = 0.8, linewidth = 1)
plt.title('Google searches(Israel)', fontsize=30)
plt.ylabel('Value', fontsize=20)
plt.xlabel('Week', fontsize=20)
plt.show()
In [90]:
import math
from statsmodels.tsa.arima_model import ARIMA
import statsmodels.api as sm
import statsmodels.tsa.api as smt
import statsmodels.formula.api as smf
from sklearn.metrics import mean_squared_error
In [91]:
from sklearn.preprocessing import MinMaxScaler
def model(treand):
    scaler = MinMaxScaler()
    treand_s = scaler.fit_transform(treand.values.reshape(-1,1))
    def create_dataset(dataset, look_back=3):
        dataX, dataY = [], []
        for i in range(len(dataset)-look_back-1):
            a = dataset[i:(i+look_back)]
            dataX.append(a)
            dataY.append(dataset[i + look_back])
        return np.array(dataX), np.array(dataY)
    x, y = create_dataset(treand_s)
    x = x.reshape(x.shape[0], x.shape[1])
    x_train = x[0:len(x)]
    y_train = y[0:len(x)]
    from sklearn.neural_network import MLPRegressor
    clf = MLPRegressor(activation = 'tanh',solver='adam')
    clf.fit(x_train,y_train)
    train_pred = clf.predict(x_train)
    plt.figure(figsize=(18, 10))
    plt.plot(scaler.inverse_transform(train_pred.reshape(-1,1)),label='Prediction')
    plt.plot(scaler.inverse_transform(y_train.reshape(-1,1)),color='red', label='Original')
    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    plt.title('World COVID cases')
    plt.show()
    return scaler.inverse_transform(train_pred.reshape(-1,1))
t1 = model(df_w_total_cases)
In [92]:
from sklearn.preprocessing import MinMaxScaler
def model(treand):
    scaler = MinMaxScaler()
    treand_s = scaler.fit_transform(treand.values.reshape(-1,1))
    def create_dataset(dataset, look_back=3):
        dataX, dataY = [], []
        for i in range(len(dataset)-look_back-1):
            a = dataset[i:(i+look_back)]
            dataX.append(a)
            dataY.append(dataset[i + look_back])
        return np.array(dataX), np.array(dataY)
    x, y = create_dataset(treand_s)
    x = x.reshape(x.shape[0], x.shape[1])
    x_train = x[0:len(x)]
    y_train = y[0:len(x)]
    x_test = x[0:len(x)]
    y_test = y[0:len(x)]
    from sklearn.neural_network import MLPRegressor
    clf = MLPRegressor(activation = 'tanh',solver='adam')
    clf.fit(x_train,y_train)
    train_pred = clf.predict(x_test)
    plt.figure(figsize=(18, 10))
    plt.plot(scaler.inverse_transform(train_pred.reshape(-1,1)),label='Prediction')
    plt.plot(scaler.inverse_transform(y_train.reshape(-1,1)),color='red', label='Original')
    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    plt.title('New Cases Globally')
    plt.show()
    return scaler.inverse_transform(train_pred.reshape(-1,1))
t1 = model(df_india_total_cases)
In [93]:
treand = df_india_total_cases
scaler = MinMaxScaler()
treand_s = scaler.fit_transform(treand.values.reshape(-1,1))
def create_dataset(dataset, look_back=3):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back)]
        dataX.append(a)
        dataY.append(dataset[i + look_back])
    return np.array(dataX), np.array(dataY)
x, y = create_dataset(treand_s)
x = x.reshape(x.shape[0], x.shape[1])
x_train = x[0:len(x)]
y_train = y[0:len(x)]
In [94]:
# Total cases per million
plt.figure(figsize=(8,5))
plt.plot(df_india_total_cases_per_million.to_numpy(),label='India')
plt.plot(df_israel_total_cases_per_million.to_numpy(),label='Israel')
plt.plot(df_usa_total_cases_per_million.to_numpy(),label='USA')
plt.plot(df_italy_total_cases_per_million.to_numpy(),label='Italy')
plt.plot(df_nz_total_cases_per_million.to_numpy(),label='New Zealand')
plt.grid()
plt.xlabel('Days')
plt.ylabel('Cases per million population')
plt.legend()
plt.title('Total cases per million population')
plt.show()
In [95]:
# New cases per million per day
plt.figure(figsize=(8,5))
plt.plot(df_india_new_cases_per_million.to_numpy(),label='India')
plt.plot(df_israel_new_cases_per_million.to_numpy(),label='Israel')
plt.plot(df_usa_new_cases_per_million.to_numpy(),label='USA')
plt.plot(df_italy_new_cases_per_million.to_numpy(),label='Italy')
plt.plot(df_nz_new_cases_per_million.to_numpy(),label='New Zealand')
plt.grid()
plt.xlabel('Days')
plt.ylabel('Cases per million population')
plt.legend()
plt.title('New cases per million population')
plt.show()

# ISRAEL HAS HERD IMMUNITY
In [96]:
# Total cases
plt.figure(figsize=(8,5))
plt.plot(df_india_total_cases.to_numpy(),label='India')
plt.plot(df_israel_total_cases.to_numpy(),label='Israel')
plt.plot(df_usa_total_cases.to_numpy(),label='USA')
plt.plot(df_italy_total_cases.to_numpy(),label='Italy')
plt.plot(df_nz_total_cases.to_numpy(),label='New Zealand')
plt.grid()
plt.xlabel('Days')
plt.ylabel('Total Cases ')
plt.legend()
plt.title('Total cases')
plt.show()
In [97]:
# New cases per day
plt.figure(figsize=(8,5))
plt.plot(df_india_new_cases.to_numpy(),label='India')
plt.plot(df_israel_new_cases.to_numpy(),label='Israel')
plt.plot(df_usa_new_cases.to_numpy(),label='USA')
plt.plot(df_italy_new_cases.to_numpy(),label='Italy')
plt.plot(df_nz_new_cases.to_numpy(),label='New Zealand')
plt.grid()
plt.xlabel('Days')
plt.ylabel('New Cases ')
plt.legend()
plt.title('New cases')
plt.show()
In [98]:
# Total deaths per million
plt.figure(figsize=(8,5))
plt.plot(df_india_total_deaths_per_million.to_numpy(),label='India')
plt.plot(df_israel_total_deaths_per_million.to_numpy(),label='Israel')
plt.plot(df_usa_total_deaths_per_million.to_numpy(),label='USA')
plt.plot(df_italy_total_deaths_per_million.to_numpy(),label='Italy')
plt.plot(df_nz_total_deaths_per_million.to_numpy(),label='New Zealand')
plt.grid()
plt.xlabel('Days')
plt.ylabel('Deaths per million population')
plt.legend()
plt.title('Total Deaths per million population')
plt.show()
print(df_nz_total_deaths_per_million.shape)
print(df_usa_total_deaths_per_million.shape)
(419,)
(456,)
In [99]:
# New deaths per million
plt.figure(figsize=(8,5))
plt.plot(df_india_new_deaths_per_million.to_numpy(),label='India')
plt.plot(df_israel_new_deaths_per_million.to_numpy(),label='Israel')
plt.plot(df_usa_new_deaths_per_million.to_numpy(),label='USA')
plt.plot(df_italy_new_deaths_per_million.to_numpy(),label='Italy')
plt.plot(df_nz_new_deaths_per_million.to_numpy(),label='New Zealand')
plt.grid()
plt.xlabel('Days')
plt.ylabel('Deaths per million population')
plt.legend()
plt.title('New Deaths per million population')
plt.show()
In [100]:
# New deaths
plt.figure(figsize=(8,5))
plt.plot(df_india_new_death.to_numpy(),label='India')
plt.plot(df_israel_new_death.to_numpy(),label='Israel')
plt.plot(df_usa_new_death.to_numpy(),label='USA')
plt.plot(df_italy_new_death.to_numpy(),label='Italy')
plt.plot(df_nz_new_death.to_numpy(),label='New Zealand')
plt.grid()
plt.xlabel('Days')
plt.ylabel('Deaths ')
plt.legend()
plt.title('New Deaths ')
plt.show()
In [101]:
# Total deaths
plt.figure(figsize=(8,5))
plt.plot(df_india_total_death.to_numpy(),label='India')
plt.plot(df_israel_total_death.to_numpy(),label='Israel')
plt.plot(df_usa_total_death.to_numpy(),label='USA')
plt.plot(df_italy_total_death.to_numpy(),label='Italy')
plt.plot(df_nz_total_death.to_numpy(),label='New Zealand')
plt.grid()
plt.xlabel('Days')
plt.ylabel('Deaths ')
plt.legend()
plt.title('Total Deaths ')
plt.show()
In [102]:
# Country wise stringency index analysis

fig, ax1 = plt.subplots()

ax2 = ax1.twinx()
ax1.plot(df_india_new_cases.to_numpy()*100/max(df_india_new_cases),label='Cases',color='r')
ax2.plot((df_india_stringency.to_numpy()),label='Stringency Index')

ax1.set_title('Stringency index analysis for India')
plt.xlabel('Days')
ax1.set_ylabel('Normalized Cases')
ax2.set_ylabel('Stringency Index')
ax1.legend(loc=2)
ax2.legend(loc=1)
plt.grid()
plt.show()
print()

fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
ax1.plot(df_israel_new_cases.to_numpy()*100/max(df_israel_new_cases),label='Cases',color='r')
ax2.plot((df_israel_stringency.to_numpy()),label='Stringency Index')
ax1.set_title('Stringency index analysis for Israel')
plt.xlabel('Days')
ax1.set_ylabel('Normalized Cases')
ax2.set_ylabel('Stringency Index')
ax1.legend(loc=1)
ax2.legend(loc=2)
plt.grid()
plt.show()
print()


fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
ax1.plot(df_nz_new_cases.to_numpy()*100/max(df_nz_new_cases),label='Cases',color='r')
ax2.plot((df_nz_stringency.to_numpy()),label='Stringency Index')
ax1.set_title('Stringency index analysis for New Zealand')
plt.xlabel('Days')
ax1.set_ylabel('Normalized Cases')
ax2.set_ylabel('Stringency Index')
ax1.legend(loc=1)
ax2.legend(loc=2)
plt.grid()
plt.show()
print()


fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
ax1.plot(df_usa_new_cases.to_numpy()*100/max(df_usa_new_cases),label='Cases',color='r')
ax2.plot((df_usa_stringency.to_numpy()),label='Stringency Index')
ax1.set_title('Stringency index analysis for USA')
plt.xlabel('Days')
ax1.set_ylabel('Normalized Cases')
ax2.set_ylabel('Stringency Index')
ax1.legend(loc=1)
ax2.legend(loc=2)
plt.grid()
plt.show()
print()


fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
ax1.plot(df_italy_new_cases.to_numpy()*100/max(df_italy_new_cases),label='Cases',color='r')
ax2.plot((df_italy_stringency.to_numpy()),label='Stringency Index')
ax1.set_title('Stringency index analysis for Italy')
plt.xlabel('Days')
ax1.set_ylabel('Normalized Cases')
ax2.set_ylabel('Stringency Index')
ax1.legend(loc=1)
ax2.legend(loc=2)
plt.grid()
plt.show()
print()





In [103]:
df_gov.head()
Out[103]:
CountryName CountryCode RegionName RegionCode Jurisdiction Date C1_School closing C1_Flag C2_Workplace closing C2_Flag ... StringencyIndex StringencyIndexForDisplay StringencyLegacyIndex StringencyLegacyIndexForDisplay GovernmentResponseIndex GovernmentResponseIndexForDisplay ContainmentHealthIndex ContainmentHealthIndexForDisplay EconomicSupportIndex EconomicSupportIndexForDisplay
0 Aruba ABW NaN NaN NAT_TOTAL 20200101 0.0 NaN 0.0 NaN ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
1 Aruba ABW NaN NaN NAT_TOTAL 20200102 0.0 NaN 0.0 NaN ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2 Aruba ABW NaN NaN NAT_TOTAL 20200103 0.0 NaN 0.0 NaN ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
3 Aruba ABW NaN NaN NAT_TOTAL 20200104 0.0 NaN 0.0 NaN ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
4 Aruba ABW NaN NaN NAT_TOTAL 20200105 0.0 NaN 0.0 NaN ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0

5 rows × 51 columns

In [104]:
df_world_index = df_gov[df_gov['CountryName'].isin(['India','Israel','United States','New Zealand','Italy' ])]
df_world_index = df_world_index[df_world_index['RegionName'].isnull()]

df_world_index = pd.concat([df_world_index['CountryName'],df_world_index['Date'],df_world_index['C1_School closing'],df_world_index['C2_Workplace closing'],df_world_index['C3_Cancel public events'],
           df_world_index['C4_Restrictions on gatherings'],df_world_index['C5_Close public transport'],df_world_index['C6_Stay at home requirements'],
           df_world_index['C7_Restrictions on internal movement'],df_world_index['C8_International travel controls']],axis=1)

# /////////////////////
# Initial State

df_temp = df_world_index[df_world_index['Date'].between(20200215, 20200305, inclusive=False)]
df_temp = df_temp.groupby('CountryName').mean()
print(df_world_index.shape)
print(df_temp.shape)

plt.figure(figsize=(16, 6))
heatmap = sns.heatmap(df_temp.drop([ 'Date'], axis=1),  annot=True)
heatmap.set_title('Heatmap', fontdict={'fontsize':18})
plt.show()

print()
# /////////////////////
# Medieval State

df_temp = df_world_index[df_world_index['Date'].between(20201215, 20210115, inclusive=False)]
df_temp = df_temp.groupby('CountryName').mean()
print(df_world_index.shape)
print(df_temp.shape)

plt.figure(figsize=(16, 6))
heatmap = sns.heatmap(df_temp.drop(['Date'], axis=1),  annot=True)
heatmap.set_title('Heatmap', fontdict={'fontsize':18})
plt.show()
(2450, 10)
(5, 9)
(2450, 10)
(5, 9)
In [105]:
# /////////////////////
# Feature Analysis
df_temp = df_world_index[df_world_index['CountryName']=='India'].drop('Date',axis=1)
corr_matrix = df_temp.corr(method='spearman')
plt.figure(figsize=(16, 6))
heatmap = sns.heatmap(corr_matrix, vmin=-1, vmax=1, annot=True)
heatmap.set_title('Spearman Correlation Matrix Heatmap', fontdict={'fontsize':18})
plt.show()
In [106]:
plt.figure(figsize=(8,5))
df_world_index1 = df_world_index[df_world_index['CountryName']=='India']
plt.plot(df_world_index1['C1_School closing'].to_numpy(),label='School closing')
plt.plot(df_world_index1['C2_Workplace closing'].to_numpy(),label='Workplace closing')
plt.plot(df_world_index1['C3_Cancel public events'].to_numpy(),label='Cancel public events')
plt.plot(df_world_index1['C4_Restrictions on gatherings'].to_numpy(),label='Restrictions on gatherings')
plt.plot(df_world_index1['C5_Close public transport'].to_numpy(),label='Close public transport')
plt.plot(df_world_index1['C6_Stay at home requirements'].to_numpy(),label='Stay at home requirements')
plt.plot(df_world_index1['C7_Restrictions on internal movement'].to_numpy(),label='Restrictions on internal movement')
plt.plot(df_world_index1['C8_International travel controls'].to_numpy(),label='International travel controls')

plt.grid()
plt.xlabel('Days')
plt.ylabel('Index ')
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.title('Governmental Measures ')
plt.show()
In [107]:
# Reproduction Number correlation with new cases

df_temp = pd.concat([df_india_reproduction,df_india_new_cases],axis=1)
corr_matrix = df_temp.corr(method='spearman')
print(corr_matrix)
print()
fig, ax1 = plt.subplots()

ax2 = ax1.twinx()
ax1.plot(df_india_new_cases.to_numpy()/max(df_india_new_cases),label='Cases',color='r')
ax2.plot((df_india_reproduction.to_numpy()),label='Reproduction Number')

ax1.set_title('Reproduction Number analysis for India')
plt.xlabel('Days')
ax1.set_ylabel('Normalized Cases')
ax2.set_ylabel('Reproduction Number')
ax1.legend(loc=3)
ax2.legend(loc=1)
plt.grid()
plt.show()
print()
                    reproduction_rate  new_cases_smoothed
reproduction_rate            1.000000           -0.528317
new_cases_smoothed          -0.528317            1.000000


In [ ]: